{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# anomaly → PMML (using Nyoka) \n",
    "\n",
    "### Exporter: Anomaly Detection models (Isolation Forests) \n",
    "### Data Set used: iris\n",
    "\n",
    "### **STEPS**: \n",
    "- Build the model using sklearn Isolation Forests\n",
    "- Build PMML (Data Dictionary, Mining schema, Ouput, PMML) using Nyoka classes"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Pre-processing, Model building (using pipeline) for iris data set"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-07-10T06:27:59.647880Z",
     "start_time": "2018-07-10T06:27:58.294880Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      " Anomaly detection model is built successfully.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Class Imputer is deprecated; Imputer was deprecated in version 0.20 and will be removed in 0.22. Import impute.SimpleImputer from sklearn instead.\n",
      "default contamination parameter 0.1 will change in version 0.22 to \"auto\". This will change the predict method behavior.\n",
      "behaviour=\"old\" is deprecated and will be removed in version 0.22. Please use behaviour=\"new\", which makes the decision_function change to match other anomaly detection algorithm API.\n"
     ]
    }
   ],
   "source": [
    "from sklearn import datasets\n",
    "from sklearn.pipeline import Pipeline\n",
    "from sklearn.preprocessing import StandardScaler, Imputer\n",
    "from sklearn.ensemble import IsolationForest\n",
    "\n",
    "irisdata = datasets.load_iris()\n",
    "pipe = Pipeline([('standard_scaler',StandardScaler()), ('Imputer',Imputer()), ('model',IsolationForest())])\n",
    "pipe.fit(irisdata.data)\n",
    "\n",
    "print(\"\\n\",\"Anomaly detection model is built successfully.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Export the Pipeline object into PMML using the Nyoka package"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-07-10T06:28:05.359880Z",
     "start_time": "2018-07-10T06:28:04.605880Z"
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "threshold_ attribute is deprecated in 0.20 and will be removed in 0.22.\n"
     ]
    }
   ],
   "source": [
    "from nyoka import skl_to_pmml\n",
    "skl_to_pmml(pipeline=pipe, col_names=irisdata.feature_names, pmml_f_name=\"IsolationForest_model.pmml\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  },
  "latex_envs": {
   "LaTeX_envs_menu_present": true,
   "autoclose": false,
   "autocomplete": true,
   "bibliofile": "biblio.bib",
   "cite_by": "apalike",
   "current_citInitial": 1,
   "eqLabelWithNumbers": true,
   "eqNumInitial": 1,
   "hotkeys": {
    "equation": "Ctrl-E",
    "itemize": "Ctrl-I"
   },
   "labels_anchors": false,
   "latex_user_defs": false,
   "report_style_numbering": false,
   "user_envs_cfg": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
